************************
* The limits of ‘Western’ supply chain sustainability governance to halt deforestation
* Lead Author: Christoph Kubitza
* Email: Christoph.Kubitza@giga-hamburg.de
* Date: 03.7.2025

*** new gen do-file from 2023 **
/* Since several macros are defined in this do-file it should be run completely.
Otherwise locals are dropped and commands may not work*/
/*Download csv from LMI webiste and adjust date of file and folder (eg "20230420") as well as paths if needed*/

	clear
	clear matrix
	set more off
	capture log close

	* install necessary ado-files 
	cap ssc install kountry
	cap ssc install multencode

	* This do-file insheets the data and does all the labelling, first droppings (size, year)
	* use your own path:)
    * Christoph's path
	global analysis "C:\Users\christoph.kubitza\OneDrive - GIGA\Land Matrix\General\Stata_LM\Beta Data"
	global datapath "C:\Users\christoph.kubitza\OneDrive - GIGA\Land Matrix\General\Stata_LM\Rawdata"
	global rawdata "C:\Users\christoph.kubitza\OneDrive - GIGA\Land Matrix\General\Stata_LM\Rawdata"

	import delimited "$datapath\download20230420\locations.csv", delimiter(";") maxquotedrows(unlimited) bindquote(strict)  clear
	bysort dealid: gen loc=_n
	rename * location*
	rename locationdealid dealid
	reshape wide locationid location@spatialaccuracylevel location@location location@point location@facilityname location@locationdescription location@commentonlocation , i( dealid ) j( locationloc )
	save "$analysis\download20230420\location.dta", replace

	import delimited "$datapath\download20230420\contracts.csv", delimiter(";") maxquotedrows(unlimited) bindquote(strict)  clear
 	bysort dealid: gen contract=_n
	drop id
	 reshape wide contractnumber contractdate contractexpirationdate durationoftheagreement commentoncontract , i( dealid ) j( contract) 
	save "$analysis\download20230420\contracts.dta", replace

	import delimited "$datapath\download20230420\datasources.csv", delimiter(";") maxquotedrows(unlimited) bindquote(strict) clear
	bysort dealid: gen loc=_n
	rename * datasource*
	rename datasourcedealid dealid
	drop datasourceid
	reshape wide datasource@datasourcetype datasource@url datasource@file datasource@publicationtitle datasource@date datasource@name datasource@organisation datasource@email datasource@phone datasource@opencontractingid datasource@commentondatasource , i( dealid ) j( datasourceloc )
	save "$analysis\download20230420\datasource.dta", replace
	
	import delimited "$datapath\download20230420\investors.csv", delimiter(";") maxquotedrows(unlimited) bindquote(strict)  clear

	save "$analysis\download20230420\investors.dta", replace

	import delimited "$datapath\download20230420\deals.csv", delimiter(";") maxquotedrows(unlimited) bindquote(strict) clear 

	cd "$analysis"
	cap log using Log\datamanagement_beta.log, replace

/// CAREFUL: When changing locals named "numberX" the naming of all following locals has to be revised.
************************************************************************************************
	*** Labelling of data and variables, splitting, and dropping of redundant variables
************************************************************************************************
	label data "This file contains all the data from the Land Matrix"

	label variable dealid "ID"
	  

**** HOST COUNTRY INFO ****
		rename targetcountry Country_Host_long
		label variable Country_Host_long "Host Country of Investment" 
		 

		 
		* kountry command to get the UN country codes							
			kountry Country_Host_long, from(other) stuck
			rename _ISO3N_ Country_Host_numerical
			replace Country_Host_numerical=384 if Country_Host_long=="CÃ´te d'Ivoire"
			replace Country_Host_numerical=638 if Country_Host_long=="Réunion"
			replace Country_Host_numerical=678 if Country_Host_long=="SÃ£o TomÃ© and Principe"
			replace Country_Host_numerical=831 if Country_Host_long=="Guernsey"
			replace Country_Host_numerical=833 if Country_Host_long=="Isle of Man"
			
			rename NAMES_STD NAMES_STD2
			kountry Country_Host_numerical, from(iso3n) to (iso3c) geo(undet)
			rename GEO Regions_Target
			rename _ISO3C_ Country_Host
	

		label var Country_Host "Target country of deal"
		label var Regions_Target "Target world region"
		
		*** Aggregate world regions to match World Bank aggregation
		gen Regions_Target_agg=Regions_Target
		replace Regions_Target_agg="East Asia and Pacific" if Regions_Target=="Melanesia" | Regions_Target=="South-Eastern Asia" | Regions_Target=="Eastern Asia" 
		replace Regions_Target_agg="Europe and Central Asia" if Regions_Target=="Eastern Europe" | Regions_Target=="Northern Europe" | Regions_Target=="Southern Europe" | Regions_Target=="Central Asia" | Regions_Target=="Western Asia"
		replace Regions_Target_agg="Latin America and the Caribbean" if Regions_Target=="Central America" |  Regions_Target=="South America" |  Regions_Target=="Caribbean"
		replace Regions_Target_agg="Middle East and North Africa" if Regions_Target=="Northern Africa" | Regions_Target=="Western Asia" 
		replace Regions_Target_agg="South Asia" if Regions_Target=="Southern Asia" 
		replace Regions_Target_agg="Sub-Saharan Africa" if Regions_Target=="Southern Africa" | Regions_Target=="Eastern Africa"  | Regions_Target=="Middle Africa" | Regions_Target=="Western Africa"

		
**** INVESTOR INFO ****	
	**** TOP PARENT COMPANIES ****

		* several investors in one variable (one row in Excel) topparentcompanies
		* Insa explanation: After split command below: topparentcopanies1  = Name of first parent company, topparentcompanies2 = ID of first parent company,  topparentcompanies3 = Country of parent company, ... topparentcompanies4 = starts with name of second parent company, ...
		split topparentcompanies, p("|" "#")
		drop topparentcompanies
		ds topparentcompanies*
		local number3:  word count `r(varlist)'
		display `number3'
		forvalues i= 2(3)`number3' { // see explanation above
			local j=`i'-1
			rename topparentcompanies`i' investorid`j'
			destring investorid`j', replace
		}
		
		* Topparentcompanies3, topparentcompanies6, topparentcompanies9, etc. are empty
		forvalues i=3(3)`number3' {
		local j=`i'-2
		drop topparentcompanies`i' 
	    }
			
		forvalues i=1(3)`number3' {	
		rename investorid`i' investorid
		merge m:1 investorid using "$analysis\download20230420\investors.dta", keep(mat mas) keepus(countryofregistrationorigin classification) nogen
		rename  investorid investorid`i'
		rename countryofregistrationorigin investorcountry`i'
		rename classification classification_inv`i'
		}
		
		
		* kountry command to get the UN country codes
		forvalues i=1(3)`number3' {
			kountry investorcountry`i', from(other) stuck
			rename _ISO3N_ Country_Inv`i'_num
			replace Country_Inv`i'_num=384 if investorcountry`i'=="CÃ´te d'Ivoire"
			replace Country_Inv`i'_num=638 if investorcountry`i'=="RÃ©union"
			replace Country_Inv`i'_num=826 if investorcountry`i'=="United Kingdom of Great Britain and Northern Ireland"
			replace Country_Inv`i'_num=831 if investorcountry`i'=="Guernsey"
			replace Country_Inv`i'_num=833 if investorcountry`i'=="Isle of Man"
			replace Country_Inv`i'_num=832 if investorcountry`i'=="Jersey"
			drop NAMES_STD
			kountry Country_Inv`i'_num, from(iso3n) to (iso3c) geo(men)
			replace GEO="Europe" if Country_Inv`i'_num==831
			replace _ISO3C_="GGY" if Country_Inv`i'_num==831
			replace _ISO3C_="JEY" if Country_Inv`i'_num==832
			replace GEO="Europe" if Country_Inv`i'_num==832
			replace GEO="Europe" if Country_Inv`i'_num==833
			replace _ISO3C_="IMN" if Country_Inv`i'_num==833
			rename investorcountry`i' Country_Inv`i'_long
			rename GEO Regions_Country_Inv`i'
			rename _ISO3C_ Country_Inv`i'

			rename topparentcompanies`i' investorname`i'
			label variable investorname`i' "Name of the investor"
			replace investorname`i' ="UNKNOWN" if strpos(investorname`i', "Unknown") | strpos(investorname`i', "Unnamed") ///
			| strpos(investorname`i', "n/a") | strpos(investorname`i', "private investors") ///
			| strpos(investorname`i', "Various Zimbabwean individuals") | strpos(investorname`i', "UAE investors")
			}

				
			*****  Merge investor classification for first, secondary, tertiary, etc. investor
            label define classification_inv_lab  1 "Asset management firm" 2 "Government/government institution" ///
            3 "Individual entrepreneur" 4 "Investment Bank" 5 "Investment Fund" 6 "Non - Profit organization" ///
            7 "Private company" 8 "Private equity firm" 9 "Semi state-owned company" 10 "State-/ government(-owned) company" ///
            11 "Stock-exchange listed company" 12 "Other (please specify)" 13 "No information"

            forvalues i=1(3)`number3' {
            gen classification_Inv`i'=.
            replace classification_Inv`i'=1 if classification_inv`i'=="Asset management firm"
            replace classification_Inv`i'=2 if classification_inv`i'=="Government/government institution" | classification_inv`i'=="Government" | classification_inv`i'=="Government institution" | classification_inv`i'=="Multilateral Development Bank (MDB)" | classification_inv`i'=="Bilateral Development Bank / Development Finance Institution"
            replace classification_Inv`i'=3 if classification_inv`i'=="Individual entrepreneur"
            replace classification_Inv`i'=4 if classification_inv`i'=="Investment Bank" 
            replace classification_Inv`i'=5 if classification_inv`i'=="Investment fund" | classification_inv`i'=="Investment Fund (all types incl. pension, hedge, mutual, private equity funds etc.)" | classification_inv`i'=="Insurance firm"
            replace classification_Inv`i'=6 if classification_inv`i'=="Non - Profit organization (e.g. Church, University etc.)"
            replace classification_Inv`i'=7 if classification_inv`i'=="Private company"
            replace classification_Inv`i'=8 if classification_inv`i'=="Private equity firm"
            replace classification_Inv`i'=9 if classification_inv`i'=="Semi state-owned company"
            replace classification_Inv`i'=10 if classification_inv`i'=="State-/government (owned) company"
            replace classification_Inv`i'=11 if classification_inv`i'=="Stock-exchange listed company"          
            replace classification_Inv`i'=12 if classification_inv`i'=="Other (please specify in comment field)"
            replace classification_Inv`i'=13 if classification_inv`i'=="No information"
            label variable classification_Inv`i' "Classification investor"
            label value classification_Inv`i' classification_inv_lab
            drop classification_inv`i'
             }
			
			local counter =1
			forvalues i=1(3)`number3' {
			rename classification_Inv`i' classification_Inv`counter'
			rename Regions_Country_Inv`i' Regions_Country_Inv`counter'
			rename Country_Inv`i' Country_Inv`counter'
			rename investorid`i' investorid`counter'
			rename investorname`i' investorname`counter'
			rename Country_Inv`i'_num Country_Inv`counter'_num
			local ++counter
			}
			 
			
**** OPERATING COMPANIES ****

		* Marking unknown operating companies (primary investors)
		replace operatingcompanyname="UNKNOWN" if strpos(operatingcompanyname, "(Unnamed investor ")
		replace operatingcompanyname="UNKNOWN" if operatingcompanyname=="Unknown ()"
		replace operatingcompanyname="UNKNOWN" if operatingcompanyname==""
		replace operatingcompanyname="UNKNOWN" if strpos(operatingcompanyname, "Unknown (Unknown Investor)") ///
		| strpos(operatingcompanyname, "Unknown (private investors)") 
		replace operatingcompanyname="UNKNOWN" if strpos(operatingcompanyname, "Unknown (Unknown)") ///
		| strpos(operatingcompanyname, "Unknown (South African Investors)") 
		replace operatingcompanyname="UNKNOWN" if strpos(operatingcompanyname, "Unknown (UNKNOWN)") ///
		| strpos(operatingcompanyname, "Unknown (Investors from Saudi Arabia)")
		replace operatingcompanyname="UNKNOWN" if strpos(operatingcompanyname, "Unknown (Individual entrepreneurs)") ///
		| strpos(operatingcompanyname, "Unknown (Chinese businessmen)")
		replace operatingcompanyname="UNKNOWN" if strpos(operatingcompanyname, "Unknown (Chinese Investors)") ///
		| strpos(operatingcompanyname, " Unknown (Businessman and politician)")
	
		

**** DEAL INFO ****	
	*** CONTRACT SIZE OF DEALS
		rename sizeundercontractleasedorpurchas contractsize_leasepurchase
		rename sizeinoperationproductioninha size2_operation

			* structure of variables (year#current#size) - needs to be split into three different variables (captures dynamics/changes in deal size)
			* year of contract size
			foreach var of varlist contractsize_leasepurchase size2_operation {
				split `var', p("|" "#")
			}
			drop contractsize_leasepurchase size2_operation
			ds contractsize_leasepurchase*
			local number4:  word count `r(varlist)'
			display `number4'
			* if day and month are not given Stata automatically sets the date to 01janYYYY
			local number5 = `number4'-2
			forvalues i=1(3)`number5' {
				gen double size_contract_year`i'=date(contractsize_leasepurchase`i', "YMD")
				replace size_contract_year`i' =date(contractsize_leasepurchase`i', "Y") if size_contract_year`i'==.
				format size_contract_year`i' %td
			}
			
			* current contract size?
			local number6 = `number4'-1
			forvalues i=2(3)`number6' {
				local j=`i'-1
				rename contractsize_leasepurchase`i' size_contract`j'_cur
				label var size_contract`j'_cur "most current contract size of the deal"
			}
			
			* Contract size in ha
			forvalues i= 3(3)`number4' {
				local j=`i'-2
				destring contractsize_leasepurchase`i', replace
				rename contractsize_leasepurchase`i' size_contract`j'
			}

		* Generate ultimate contract size - size_contract_year gives the most recent information on deal size!
			gen size_contract_year=size_contract_year1
			forvalues i=1(3)`number5' {
				replace size_contract_year=size_contract_year`i' if size_contract`i'_cur=="current"
				replace size_contract_year=size_contract_year`i' if ///
				(size_contract_year`i'> size_contract_year & size_contract_year`i'!=.) ///
				| (size_contract_year==.  & size_contract_year`i'!=.)
				replace size_contract_year=. if size_contract`i'!=. & size_contract_year`i'==.
			}
			format size_contract_year %td

			gen size_contract = .
			forvalues i=1(3)`number5' {
				replace size_contract = size_contract`i' if size_contract`i'!=. 
			}
			forvalues i=1(3)`number5' {
				replace size_contract=size_contract`i' if size_contract_year`i'==size_contract_year ///
				& size_contract_year`i'!=.
			}
			forvalues i=1(3)`number5' {
				replace size_contract=size_contract`i' if size_contract`i'_cur=="current"
			}
			label variable size_contract "Current size of the deal in hectare - under contract"
			label variable size_contract_year "Year of the current size of the deal in hectare - under contract"
			
			/* size_contract`i' and size_contract_year`i' display contract sizes for single years. 
			size_operation`i' and size_operation_year`i' display operation sizes for single years.
			I do not delete these variables in case we want to analyse deal sizes over time */
			
				forvalues i=1(3)`number5' {
				replace size_contract=size_contract`i' if size_contract`i'_cur=="current"
			}
			
			local counter =1
			forvalues i=1(3)`number5' {
			rename size_contract`i' size_contract`counter'
			rename size_contract`i'_cur size_contract`counter'_cur
			rename size_contract_year`i' size_contract_year`counter'
			rename contractsize_leasepurchase`i' contractsize_leasepurchase`counter'
			local ++counter
			}
			
	**** OPERATION SIZE OF DEALS
		ds size2_operation*
		local number7:  word count `r(varlist)'
		display `number7'
		
		* Date of operation size
		* if day and month are not given Stata automatically sets the date to 01janYYYY
		local number8 = `number7'-2
		forvalues i=1(3)`number8' {
			gen double size_operation_year`i'=date(size2_operation`i', "YMD")
			replace size_operation_year`i' =date(size2_operation`i', "Y") if size_operation_year`i'==.
			format size_operation_year`i' %td
			drop size2_operation`i'
		}
		
		* current operation size?
		local number9 = `number7'-1
		forvalues i=2(3)`number9' {
			local j=`i'-1
			rename size2_operation`i' size_operation`j'_cur
		}

		* operation size in ha
		forvalues i= 3(3)`number7' {
			local j=`i'-2
			destring size2_operation`i', replace
			rename size2_operation`i' size_operation`j'
		}

		* Generate ultimate production size 
		gen size_operation_year=size_operation_year1
		forvalues i=1(3)`number8' {
			replace size_operation_year=size_operation_year`i' if size_operation`i'_cur=="current"
			replace size_operation_year=size_operation_year`i' if ///
			(size_operation_year`i'> size_operation_year & size_operation_year`i'!=.) ///
			| (size_operation_year==.  & size_operation_year`i'!=.)
			replace size_operation_year=. if size_operation`i'!=. & size_operation_year`i'==.
		}
		format size_operation_year %td

		gen size_operation = .
		forvalues i=1(3)`number8' {
			replace size_operation = size_operation`i' if size_operation`i'!=. 
		}
		forvalues i=1(3)`number8' {
			replace size_operation=size_operation`i' if size_operation_year`i'==size_operation_year ///
			& size_operation_year`i'!=.
		}
		forvalues i=1(3)`number8' {
			replace size_operation=size_operation`i' if size_operation`i'_cur=="current"
		}
		
			local counter =1
			forvalues i=1(3)`number8' {
			rename size_operation`i'_cur size_operation`counter'_cur 
			rename size_operation_year`i' size_operation_year`counter'
			rename size_operation`i' size_operation`counter'
			local ++counter
			}
		 
		label variable size_operation "Current size of the deal in hectare - in operation(production)"
		label variable size_operation_year "Year of the current size of the deal in hectare - in operation(production)"

		* Generate ultimate contract size
		gen  size_contract_mod = .
		replace size_contract_mod = size_contract
		replace size_contract_mod = size_operation if missing(size_contract)
		label variable size_contract_mod "Size under contract - modified: if missing, replaced by operation size"

		*intended size/intended_size_mod
		rename intendedsize size_intended 
		label variable size_intended "Intended size of the deal in hectare"

		gen size_intended_mod = .
		replace size_intended_mod = size_intended 
		replace size_intended_mod = size_contract if missing(size_intended_mod) 
		replace size_intended_mod = size_operation if missing(size_intended_mod)
		label variable size_intended_mod "Intended size of the deal in hectare - modified: if missing, replaced by contract size resp. operation size"

			*size classes
			gen size_class     = 1 if  size_contract >= 200   &  size_contract <= 2000
			replace size_class = 2 if  size_contract > 2000   &  size_contract <= 5000
			replace size_class = 3 if  size_contract > 5000   &  size_contract <= 10000
			replace size_class = 4 if  size_contract > 10000  &  size_contract <= 20000
			replace size_class = 5 if  size_contract > 20000  &  size_contract <= 50000
			replace size_class = 6 if  size_contract > 50000  &  size_contract <= 200000
			replace size_class = 7 if  size_contract > 200000 &  size_contract != .
			replace size_class = 0 if size_contract  < 200
			label define size_class 0"<200" 1"200 to 2.000" 2"2.001 to 5.000" 3"5.001 to 10.000" 4"10.001 to 20.000" 5"20.001 to 50.000" 6"50.001 to 200.000" 7"more than 200.000" 
			label variable size_class "Size class"
			label value size_class size_class
			tab size_class, mis

			
	*** NEGOTIATION STATUS
		split negotiationstatus, p("|" "#")
		rename negotiationstatus old_negotiationstatus
		ds negotiationstatus*
		local number10:  word count `r(varlist)'
		display `number10'

		* if day and month are not given Stata automatically sets the date to 01janYYYY
		local number11 = `number10'-2
		forvalues i=1(3)`number11' {
			gen double negotiationstatus_year`i'=date(negotiationstatus`i', "YMD")
			replace negotiationstatus_year`i' =date(negotiationstatus`i', "Y") if negotiationstatus_year`i'==.
			format negotiationstatus_year`i' %td
			drop negotiationstatus`i'
		}

		local number12 = `number10'-1
		forvalues i=2(3)`number12' {
			local j=`i'-1
			rename negotiationstatus`i' negotiation_status`j'_cur
		}

		forvalues i= 3(3)`number10' {
			local j=`i'-2
			rename negotiationstatus`i' negotiation_status`j'
		}

		forvalues i=1(3)`number11' {
			gen negotiationstatus`i'=1 if negotiation_status`i'=="Intended (Expression of interest)"
			replace negotiationstatus`i'=2 if negotiation_status`i'=="Intended (Under negotiation)"
			replace negotiationstatus`i'=3 if negotiation_status`i'=="Intended (Memorandum of understanding)"
			replace negotiationstatus`i'=4 if negotiation_status`i'=="Concluded (Oral Agreement)"
			replace negotiationstatus`i'=5 if negotiation_status`i'=="Concluded (Contract signed)"
			replace negotiationstatus`i'=6 if negotiation_status`i'=="Failed (Negotiations failed)"
			replace negotiationstatus`i'=7 if negotiation_status`i'=="Failed (Contract cancelled)"
			replace negotiationstatus`i'=8 if negotiation_status`i'=="Contract expired"
			replace negotiationstatus`i'=9 if negotiation_status`i'=="Change of ownership"
			replace negotiationstatus`i'=10 if negotiation_status`i'==""
			drop negotiation_status`i'
		}

		*Generate ultimate negotiation status
		gen negotiation_status_year=negotiationstatus_year1
		forvalues i=1(3)`number11' {
			replace negotiation_status_year=negotiationstatus_year`i' if negotiation_status`i'_cur=="current"
			replace negotiation_status_year=negotiationstatus_year`i' if ///
			(negotiationstatus_year`i' > negotiation_status_year & negotiationstatus_year`i'!=.) ///
			| (negotiation_status_year==. & negotiationstatus_year`i'!=.)
			replace negotiation_status_year=. if negotiationstatus`i' !=10 & negotiationstatus_year`i'==.
		}	
		format negotiation_status_year %td

		gen negotiation_status=10
		forvalues i=1(3)`number11' {
			replace negotiation_status=negotiationstatus`i' if negotiationstatus`i'!=. ///
			& negotiationstatus`i'!=10 
		}
		forvalues i=1(3)`number11' {
			replace negotiation_status=negotiationstatus`i' if negotiationstatus_year`i'==negotiation_status_year ///
			& negotiationstatus_year`i'!=. 
		}
		forvalues i=1(3)`number11' {
			replace negotiation_status=negotiationstatus`i' if negotiation_status`i'_cur=="current"
		}

		forvalues i=1(3)`number11' {
			recode negotiationstatus`i' (1=11) (2=20) (3=30) (4=40) (5=50) (6=60) (7=70) 
			recode negotiationstatus`i' (40=1) (50=2) (11=3) (20=4) (60=5) (70=6) (30=7) (.=10)
		}
		recode negotiation_status  (1=11) (2=20) (3=30) (4=40) (5=50) (6=60) (7=70) 
		recode negotiation_status (40=1) (50=2) (11=3) (20=4) (60=5) (70=6) (30=7) (.=10)

		# delimit; 
		label define negotiationstatus 
		1 "Oral Agreement" 
		2 "Contract signed" 
		3 "Expression of interest" 
		4 "Under negotiation" 
		5 "Negotiations failed" 
		6 "Contract cancelled" 
		7 "Memorandum of Understanding"
		8 "Contract expired"
		9 "Change of ownership"
		10 "No information";
		# delimit cr
		label values negotiation_status negotiationstatus
		forvalues i=1(3)`number11' {
			label values negotiationstatus`i' negotiationstatus
		}
		label var negotiation_status "Current negotiation status"
		label var negotiation_status_year "Year of current negotiation status"	

		tab currentnegotiationstatus
		tab negotiation_status
		drop currentnegotiationstatus 
		

		 
		
	*** IMPLEMENTATION STATUS
		split implementationstatus, p("|" "#")
		rename implementationstatus old_implementationstatus
		ds implementationstatus*
		local number13: word count `r(varlist)'
		display `number13'

		* if day and month are not given Stata automatically sets the date to 01janYYYY
		local number14 = `number13'-2
		forvalues i=1(3)`number14' {
			gen double implementationstatus_year`i'=date(implementationstatus`i', "YMD")
			replace implementationstatus_year`i' =date(implementationstatus`i', "Y") if implementationstatus_year`i'==.
			format implementationstatus_year`i' %td
			drop implementationstatus`i'
		}

		local number15 = `number13'-1
		forvalues i=2(3)`number15' {
			local j=`i'-1
			rename implementationstatus`i' implementation_status`j'_cur
		}

		forvalues i= 3(3)`number13' {
			local j=`i'-2
			rename implementationstatus`i' implementation_status`j'
		}

		label define implementationstatus 1 "Project not started" 2 "Startup phase (no production)" 3 "In operation (production)" 4 "Project abandoned" 5 "No information"
		forvalues i=1(3)`number14' {
			encode implementation_status`i', gen(implementationstatus`i') label(implementationstatus) // destring
			recode implementationstatus`i' (.=5)
			drop implementation_status`i'
		}

		*Generate ultimate implementation status
		gen implementation_status_year=implementationstatus_year1 
		forvalues i=1(3)`number14' {
			replace implementation_status_year=implementationstatus_year`i' if implementation_status`i'_cur=="current"
			replace implementation_status_year =implementationstatus_year`i' if implementationstatus_year`i'>implementation_status_year ///
			& implementationstatus_year`i'!=. | (negotiation_status_year==. & negotiationstatus_year`i'!=.)
			replace implementation_status_year=. if implementationstatus`i'!=5 & implementationstatus_year`i'==.
		}
		format implementation_status_year %td

		gen implementation_status = 5
		forvalues i=1(3)`number14' {
			replace implementation_status = implementationstatus`i' if implementationstatus`i'!=5 ///
			& implementationstatus`i'!=.
		}
		forvalues i=1(3)`number14' {
			replace implementation_status = implementationstatus`i' if implementationstatus_year`i'==implementation_status_year ///
			& implementationstatus_year`i' !=.
		}
		forvalues i=1(3)`number14' {
			replace implementation_status=implementationstatus`i' if implementation_status`i'_cur=="current"
		}

		forvalues i=4(3)`number14' {
			replace implementation_status_year=. if implementation_status==implementationstatus`i' ///
			& implementationstatus_year`i'==.
		} 

		label values implementation_status implementationstatus
		label var implementation_status "Current implementation status"
		label var implementation_status_year "Year of current implementation status"	
		drop currentimplementationstatus 
		


	**** Dummy =1 if initiation year before 2000 (Base filter B3)
		
		gen d_inistatus=0
		forvalues i=1(3)`number11' {
			replace d_inistatus=1 if negotiationstatus`i'<10 & negotiationstatus`i'!=3 & negotiationstatus_year`i'<td(1jan2000)
		}
		forvalues i=1(3)`number14' {
			replace d_inistatus=1 if implementationstatus`i'>=2 & implementationstatus`i'<=4 & implementationstatus_year`i'<td(1jan2000)
		}
			
					local counter =1
			forvalues i=1(3)`number14' {
			rename implementation_status`i'_cur implementation_status`counter'_cur 
			rename implementationstatus_year`i' implementationstatus_year`counter'
			rename implementationstatus`i' implementationstatus`counter'
			local ++counter
			}
			
					local counter =1
			forvalues i=1(3)`number11' {
			rename negotiation_status`i'_cur negotiation_status`counter'_cur 
			rename negotiationstatus_year`i' negotiationstatus_year`counter'
			rename negotiationstatus`i' negotiationstatus`counter'
			local ++counter
			}
			
	*** DEAL SIZE ***
		gen deal_size= size_intended_mod if negotiation_status == 3 | negotiation_status == 4 ///
		| negotiation_status==7 | negotiation_status==5
		replace deal_size= size_contract_mod if negotiation_status ==1 | negotiation_status==2 ///
		| negotiation_status==6 | negotiation_status==8 | negotiation_status==9
		 label var deal_size "deal size in ha (regardless of deal status)"

	*** INTENTION OF INVESTMENT ***
	// Insa explaination of code: There is one intention variable which is split by "," first. Then we have the name of intention for variables intention2 - intention6. intention1 further contains information on size, year, current - so intentions one need to be further split by "#" seperating year, current, size, name and "|" separating different crops for which this information is available. The reason why the intentions are sometimes separted by "," (without information on size, year, current) is that this additional information is only available for some deals. 
 	
		*** Goal of this code: To end up with up to 10 variables of intentions of investment 
		*** Some give information on current information status
		*** Some give information on size of the specific investment intention type (e.g. 1000 ha for food crops, 1000 ha for other activities)
		
		
		rename intentionofinvestment intentionof
		order intentionof
		split intentionof, p("|")
		rename intentionof old_intentionof
		ds intentionof*
		local number16: word count `r(varlist)'
		display `number16'

		forvalues t=1/`number16' {
		split intentionof`t', p("#") gen(intent`t')
		drop intentionof`t'
		ds intent`t'*
		local number117: word count `r(varlist)'
		display `number117'
		gen intentionof`t'4 = intent`t'4
		gen intention`t'2_cur = intent`t'2
		gen double intention`t'1_year=date(intent`t'1, "YMD")
		replace intention`t'1_year=date(intent`t'1, "Y") if intention`t'1_year==.
		format intention`t'1_year %td
		replace intent`t'3="0" if intent`t'3=="None"
		destring intent`t'3, gen(intention`t'3_size)
		drop intent`t'1
		drop intent`t'2
		drop intent`t'3
		drop intent`t'4
		rename intentionof`t'4 intentionof`t'
		rename intention`t'2_cur intention`t'_cur
		rename intention`t'1_year intention`t'_year
		rename intention`t'3_size intention`t'_size
		}

		
		# delimit; 
		label define intention
		1 "Biofuels"
		2 "Food crops"
		3 "Livestock"
		4 "Non-food agricultural commodities"
		5 "Agriculture unspecified"
		6 "Timber plantation (for wood and fibre)"
		7 "For carbon sequestration/REDD"
		8 "Forestry unspecified"
		9 "Mining"
		10 "Tourism"
		11 "Industry"
		12 "Conservation"
		13 "Renewable Energy"
		14 "Forest logging / management (for wood and fibre)"
		15 "Oil / Gas extraction"
		16 "Other (please specify)"
		17 "No information";
		# delimit cr

		ds intentionof*
		local number17: word count `r(varlist)'
		display `number17'

		forvalues t=1/`number17' {
		split intentionof`t', p(",") gen(intentionof`t'_)	
		ds intentionof`t'_*
		local number21: word count `r(varlist)'
		display `number21'
		forvalues i=1/`number21' {
		replace intentionof`t'_`i'=strtrim(intentionof`t'_`i')
		encode intentionof`t'_`i', gen(intention`t'_`i') label(intention)
		recode intention`t'_`i' (.=17)
		}
		order intention`t'_*, after (intentionof`t')
		drop intentionof`t'
		}
		drop intentionof*

				
	*** CROP PRODUCED *** ///
		
		rename cropsarea crops
		order crops
		split crops, p("|")
		rename crops old_crops
		ds crops*
		local number116: word count `r(varlist)'
		display `number116'
		
		forvalues t=1/1 {
		split crops`t', p("#") gen(product`t')
		drop crops`t'
		ds product`t'*
		local number117: word count `r(varlist)'
		display `number117'

		local counter `=`number116'+1'
		forvalues i=`number117'/`number117' { 
			gen crops`counter' = product`t'`i'
			local ++counter
			drop product`t'`i'
			}
		macro drop _counter

		local counter `=`number116'+1'
		local number118=`number117'-4
		forvalues i=`number118'/`number118' {
			gen crop`counter'_cur = product`t'`i'
			local ++counter
			drop product`t'`i'
		}
		macro drop _counter

		local counter `=`number116'+1'
		local number119=`number117'-5
		forvalues i=`number119'/`number119' {
			gen double crop`counter'_year=date(product`t'`i', "YMD")
			replace crop`counter'_year=date(product`t'`i', "Y") if crop`counter'_year==.
			format crop`counter'_year %td
			local ++counter
			drop product`t'`i'
		}
		macro drop _counter

		local counter `=`number116'+1'
		local number120=`number117'-3
		forvalues i=`number120'/`number120' {
			destring product`t'`i', gen(crop`counter'_size)
			local ++counter
			drop product`t'`i'
		}
		
		local counter `=`number116'+1'
		local number121=`number117'-2
		forvalues i=`number121'/`number121' {
			destring product`t'`i', gen(crop`counter'_yield)
			local ++counter
			drop product`t'`i'
		}

				local counter `=`number116'+1'
		local number122=`number117'-1
		forvalues i=`number122'/`number122' {
			destring product`t'`i', gen(crop`counter'_export)
			local ++counter
			drop product`t'`i'
		}


		local counter `=`number116'+1'
		rename crops`counter' crops`t'
		rename crop`counter'_cur crop`t'_cur
		rename crop`counter'_size crop`t'_size
		rename crop`counter'_year crop`t'_year
		rename crop`counter'_yield crop`t'_yield
		rename crop`counter'_export crop`t'_export

		}

		ds crops*
		local number177: word count `r(varlist)'
		display `number177'

	forvalues t=2/`number177' {
		split crops`t', p("#") gen(product`t')
		drop crops`t'
		ds product`t'*
		local number117: word count `r(varlist)'
		display `number117'
		
		local counter `=`number116'+1'
		forvalues i=`number117'/`number117' { 
			gen crops`counter' = product`t'`i'
			local ++counter
			drop product`t'`i'
			}
		macro drop _counter

		local counter `=`number116'+1'
		local number118=`number117'-4
		forvalues i=`number118'/`number118' {
			gen crop`counter'_cur = product`t'`i'
			local ++counter
			drop product`t'`i'
		}
		macro drop _counter

		local counter `=`number116'+1'
		local number119=`number117'-5
		forvalues i=`number119'/`number119' {
			gen double crop`counter'_year=date(product`t'`i', "YMD")
			replace crop`counter'_year=date(product`t'`i', "Y") if crop`counter'_year==.
			format crop`counter'_year %td
			local ++counter
			drop product`t'`i'
		}
		macro drop _counter

		local counter `=`number116'+1'
		local number120=`number117'-3
		forvalues i=`number120'/`number120' {
			destring product`t'`i', gen(crop`counter'_size)
			local ++counter
			drop product`t'`i'
		}
		
		local counter `=`number116'+1'
		local number121=`number117'-2
		forvalues i=`number121'/`number121' {
			destring product`t'`i', gen(crop`counter'_yield)
			local ++counter
			drop product`t'`i'
		}

				local counter `=`number116'+1'
		local number122=`number117'-1
		forvalues i=`number122'/`number122' {
			destring product`t'`i', gen(crop`counter'_export)
			local ++counter
			drop product`t'`i'
		}

		local counter `=`number116'+1'
		rename crops`counter' crops`t'
		rename crop`counter'_cur crop`t'_cur
		rename crop`counter'_size crop`t'_size
		rename crop`counter'_year crop`t'_year
		rename crop`counter'_yield crop`t'_yield
		rename crop`counter'_export crop`t'_export

	}		

		ds crops*
		local number116: word count `r(varlist)'
		display `number116'
		
	forvalues t=1/`number116' {
		split crops`t', p(",") gen(product`t')
		drop crops`t'
		ds product`t'*
		local number117: word count `r(varlist)'
		display `number117'

		forvalues i=1/`number117' { 
			gen crops`t'_`i' = product`t'`i'
			local ++counter
			drop product`t'`i'
			order crops`t'_`i', before(crop`t'_size)
			}
	}
		macro drop _counter


		# delimit; 
		label define crop
		1 "Accacia"
		2 "Alfalfa"
		3 "Almond"
		4 "Aloe Vera" 
		5 "Apple"
		
		6 "Aquaculture (unspecified crops)"
		7 "Bamboo"
		8 "Banana"
		9 "Barley"
		10 "Bean"
		
		11 "Buckwheat"
		12 "Cacao"
		13 "Canola"
		14 "Cashew"
		15 "Cassava (Maniok)"
		                  
		16 "Castor Oil Plant"
		17 "Cereals (unspecified)"
		18 "Cherries"
		19 "Citrus Fruits (unspecified)"
		20 "Coconut"
		
		21 "Coffee Plant"
		22 "Corn (Maize)"
		23 "Cotton"
		24 "Croton"
		25 "Dill"
		
		26 "Eucalyptus"
		27 "Fig-Nut"
		28 "Flowers (unspecified)"
		29 "Fodder Plants (unspecified)"
		30 "Food crops (unspecified)"
		
		31 "Fruit (unspecified)"
		32 "Grains (unspecified)"
		33 "Grapes"
		34 "Herbs (unspecified)"
		35 "Jatropha"
		
		36 "Lentils"
		37 "Mango"
		38 "Oil Palm"
		39 "Oil Seeds (unspecified)"
		40 "Oleagionous plant"
		
		41 "Olives"
		42 "Onion"
		43 "Other crops (please specify)"
		44 "Palms"
		45 "Papaya"
		
		46 "Passion fruit"
		47 "Peanut (groundnut)"
		48 "Peas"
		49 "Pepper"
		50 "Pine"
		
		51 "Pineapple"
		52 "Pomegranate"
		53 "Pongamia Pinnata"
		54 "Potatoes"
		55 "Pulses (unspecified)"
		
		56 "Rapeseed"
		57 "Rice"
		58 "Rice (hybrid)"
		59 "Roses"
		60 "Rubber tree"
		
		61 "Rye"
		62 "Seaweed / Macroalgae(unspecified)"
		63 "Seeds Production (unspecified)"
		64 "Sesame"
		65 "Sisal"
		
		66 "Sorghum"
		67 "Soya Beans"
		68 "Sugar (unspecified)"
		69 "Sugar Cane"
		70 "Sugar beet"
		
		71 "Sun Flower"
		72 "Sweet Potatoes"
		73 "Tea"
		74 "Teak"
		75 "Tobacco"
		
		76 "Tomatoes"
		77 "Trees (unspecified)"
		78 "Vegetables (unspecified)"
		79 "Vineyard"
		80 "Wheat"
		999 "No information";
		# delimit cr


		ds crop*_size
		local number116: word count `r(varlist)'
		display `number116'

	forvalues i=1/`number116' {
		ds crops`i'_*
		local number119: word count `r(varlist)'
		display `number119'
		forvalues t=1/`number119'{
		replace crops`i'_`t'=strtrim(crops`i'_`t')
		encode crops`i'_`t', gen(crop`i'_`t') label(crop)
		recode crop`i'_`t' (.=999)
		}
	}
		drop crops*

		
	*** FORMER USE ETC
		* Former land cover
		rename formerlandcover former_cover
		split former_cover, p("|")
		drop former_cover
		ds former_cover*
		local number24:  word count `r(varlist)'
		display `number24'

		multencode former_cover1-former_cover`number24', gen (former_lcover1-former_lcover`number24')
		foreach x of varlist former_lcover1-former_lcover`number24'{
			recode  `x' (1=1 "Cropland") (2=2 "Forestland") (3=4 "Marginal Land")(4 .=7 "No information") (6=3 "Shrub_Grasland") (5=5 "Pasture") (7=6 "Wetland") , gen(`x'_lab)
			
		} //Please note: Other Land to be labeled as "no information"
		 
		drop former_cover* former_lcover1-former_lcover`number24'

		forvalues i=1/`number24' {
			rename former_lcover`i'_lab former_cover`i'
		}

		gen former_cover= former_cover1
		forvalues i=2/`number24' {
			replace former_cover= former_cover`i' if former_cover==5 & former_cover`i'!=5
		}
		label values former_cover former_lcover1_lab

	* Former landowner
	rename formerlandowner former_owner
	split former_owner, p("|")
	drop former_owner
	ds former_owner*
	local number25:  word count `r(varlist)'
	display `number25'
	multencode former_owner1-former_owner`number25', gen (former_lowner1-former_lowner`number25')
	foreach x of varlist former_lowner1-former_lowner`number25' {
		recode  `x' (1=4 "Community") (2=5 "Indigenous people") (4=3 "Private (large-scale farm)")(5=2 "Private (Smallholders)") (6=1 "State") (3 .=6 "No information") , gen(`x'_lab)
	} // Please note: Other to be labeled as "no information 

	tab former_lowner1 former_lowner1_lab,mis
	drop former_owner* former_lowner1-former_lowner`number25'

	forvalues i=1/`number25' {
		rename former_lowner`i'_lab former_owner`i'
	}

	* Former landuse
	rename formerlanduse former_use
	split former_use, p("|")
	drop former_use
	ds former_use*
	local number26:  word count `r(varlist)'
	display `number26'
	multencode former_use1-former_use`number26', gen (former_luse1-former_luse`number26')
	foreach x of varlist former_luse1-former_luse`number26'{
		recode  `x' (1=1 "Commercial (large-scale) agriculture") (2=2 "Conservation") (3=3 "Forestry") ///
		(4=4 "Hunting/Gathering") (5 .=8 "No information") (6=5 "Pastoralists") (7=6 "Shifting cultivation") (8=7 "Smallholder agriculture"), gen(`x'_lab)
	} // Please note: Other to be labeled as "no information 

	drop former_use* former_luse1-former_luse`number26'

	forvalues i=1/`number26' {
		rename former_luse`i'_lab former_use`i'
	}

	* Split variables
	split negativeimpactsforlocalcommuniti, p("|") gen(negimpact_localcomm)
	drop negativeimpactsforlocalcommuniti

	split natureofthedeal, p("|") gen(natureofthedeal)
	drop natureofthedeal

	split materializedbenefitsforlocalcomm, p("|") gen(matbenefits_localcomm)
	drop materializedbenefitsforlocalcomm

	split promisedbenefitsforlocalcommunit, p("|") gen(prombenefits_localcomm)
	drop promisedbenefitsforlocalcommunit

	split sourceofwaterextraction, p("|") gen(source_waterextraction)
	drop sourceofwaterextraction

	
*** Order variables
	order dealid ///
		size_operation size_operation_year size_contract_mod ///
		size_contract_year size_intended  ///
		former_cover* former_owner* ///
		former_use*	implementation_status negotiation_status // investor data in seperate dataset

	
	ds Regions_Country_Inv*
	local number178: word count `r(varlist)'
	display `number178'

	forvalues i=1/`number178' {
	order Regions_Country_Inv`i' Country_Inv`i' classification_Inv`i', after(investorid`i')
	}
	
	ds crop*_size
	local number116: word count `r(varlist)'
	display `number116'

	forvalues i=1/`number116' {
	order crop`i'_size crop`i'_yield crop`i'_export crop`i'_cur crop`i'_year, before(crop`i'_1)
	}

	drop old_negotiationstatus old_intentionof old_implementationstatus old_crops former_cover Country_Inv*_long Country_Inv*_num
	sort dealid

	merge 1:1 dealid using "$analysis\download20230420\location.dta", nogen keep(mat mas)
	merge 1:1 dealid using "$analysis\download20230420\datasource.dta", nogen keep(mat mas)
	merge 1:1 dealid using "$analysis\download20230420\contracts.dta", nogen keep(mat mas)

***************************************************************************************
*** Some final additional data manipulations, corrections, droppings
***************************************************************************************

	*** 1) Create an agriculture dummy - so that we can easily drop all non-agricultural projects!
	gen d_agri = 0

	ds intention*_size
	local number130: word count `r(varlist)'
	display `number130'

	forvalues i=1/`number130' {
	ds intention`i'_*
	local number140: word count `r(varlist)'
	display `number140'
	local j=`number140'-3
	forvalues t=1/`j' {
	replace d_agri = 1 if intention`i'_`t'>= 1 & intention`i'_`t'<= 5 & intention`i'_cur=="current"
	replace d_agri = 1 if intention`i'_`t'==18 & intention`i'_cur=="current"
	}
	}

	*Forestry deals
	gen d_forestry= 0

	ds intention*_size
	local number130: word count `r(varlist)'
	display `number130'

	forvalues i=1/`number130' {
	ds intention`i'_*
	local number140: word count `r(varlist)'
	display `number140'
	local j=`number140'-3
	forvalues t=1/`j' {
	replace d_forestry=1 if intention`i'_`t'>=7 & intention`i'_`t'<=8 & intention`i'_cur=="current"
	replace d_forestry=1 if intention`i'_`t'==19 & intention`i'_cur=="current"
	replace d_forestry=1 if intention`i'_`t'==22 & intention`i'_cur=="current"
	}
	}

	*Delete not necessary vars
	drop NAMES_STD2 d_inistatus
	
***ORDERING
	*Investors
	order investorname1 investorname1 investorid1 Country_Inv1 Regions_Country_Inv1 Country_Inv1 classification_Inv1 , after(Regions_Target_agg)
	ds classification_Inv*
	local number41:  word count `r(varlist)'
	display `number41'
	local counter =1
	forvalues i= 2/`number41' { 
	order investorname`i' investorname`i' investorid`i' Country_Inv`i' Regions_Country_Inv`i' Country_Inv1 classification_Inv`i' , before(investorname`counter')
	}
	order investorname1 investorname1 investorid1 Country_Inv1 Regions_Country_Inv1 Country_Inv1 classification_Inv1 , after(Regions_Target_agg)
	
	*contract
	ds classification_Inv*
	local number42:  word count `r(varlist)'
	display `number42'
	order size_contract1 contractsize_leasepurchase1 size_contract1_cur size_contract_year1 , after(classification_Inv`number42')
	ds size_contract*_cur
	local number41:  word count `r(varlist)'
	display `number41'
	local counter =1
	forvalues i= 2/`number41' { 
	order size_contract`i' contractsize_leasepurchase`i' size_contract`i'_cur size_contract_year`i' , before(size_contract`counter')
	}
	ds classification_Inv*
	local number42:  word count `r(varlist)'
	display `number42'
	order size_contract1 contractsize_leasepurchase1 size_contract1_cur size_contract_year1 , after(classification_Inv`number42')
	
	*Operation
	ds size_contract*_cur
	local number42:  word count `r(varlist)'
	display `number42'
	order size_operation1 size_operation1_cur size_operation_year1 , after(size_contract_year`number42')
	ds size_operation*_cur
	local number41:  word count `r(varlist)'
	display `number41'
	local counter =1
	forvalues i= 2/`number41' { 
	order size_operation`i' size_operation_year`i' size_operation`i'_cur , before(size_operation`counter')
	}
	ds size_contract*_cur
	local number42:  word count `r(varlist)'
	display `number42'
	order size_operation1 size_operation_year1 size_operation1_cur  , after(size_contract_year`number42')
	
	*Negotation status
	ds size_operation*_cur
	local number42:  word count `r(varlist)'
	display `number42'
	order negotiationstatus1 negotiation_status1_cur negotiationstatus_year1 , after(size_operation`number42'_cur)
	ds negotiation_status*_cur
	local number41:  word count `r(varlist)'
	display `number41'
	local counter =1
	forvalues i= 2/`number41' { 
	order negotiationstatus`i' negotiation_status`i'_cur negotiationstatus_year`i' , before(negotiationstatus`counter')
	}
	ds size_operation*_cur
	local number42:  word count `r(varlist)'
	display `number42'
	order negotiationstatus1 negotiation_status1_cur negotiationstatus_year1 , after(size_operation`number42'_cur)
	
	*implementation
	ds negotiationstatus_year*
	local number42:  word count `r(varlist)'
	display `number42'
	order implementationstatus1 implementation_status1_cur  implementationstatus_year1 , after(negotiationstatus_year`number42')
	ds implementation_status*_cur
	local number41:  word count `r(varlist)'
	display `number41'
	local counter =1
	forvalues i= 2/`number41' { 
	order implementationstatus`i' implementation_status`i'_cur  implementationstatus_year`i' , before(implementationstatus`counter')
	}
	ds negotiationstatus_year*
	local number42:  word count `r(varlist)'
	display `number42'
	order implementationstatus1 implementation_status1_cur  implementationstatus_year1 , after(negotiationstatus_year`number42')

order size_contract size_intended_mod deal_size size_class negotiation_status_year implementation_status_year , after( size_intended)

save "$analysis\download20230420\deals20230420.dta", replace



